setwd("d:/PORES Dropbox/Stephen Pettigrew/ranked-choice-voting/replication files")

require(tidyverse)
options(scipen = 999)
source("code/geography-colors.R")



raw <- readRDS("final-data/all-errors-and-votes.RDS")

# Error rates by office-----

summarize.rates <- function(x, include.count = F){
  z <- x %>%
    summarize(count = n(),
              overvote.rate = mean(overvote),
              .groups = "drop")
  if(!include.count) z$count <- NULL
  return(z)
}



errors.by.office <- raw$errors %>%
  filter(geography != "ME") %>%
  filter(bubble.pattern != "") %>%
  group_by(geography, date, office.id, rcv) %>%
  summarize.rates() %>%
  ungroup() %>%
  mutate(id = paste(geography, format(ymd(date), "%b. '%y"), sep = "\n")) %>%
  rename(pct = overvote.rate) %>%
  mutate(rcv = ifelse(rcv, "Yes","No"))

errors.by.geo <- errors.by.office %>%
  group_by(geography,date, id, rcv) %>%
  summarize(se = sd(pct) / sqrt(n()),
            pct = mean(pct),
            lower = pct + qnorm(.025) * se,
            upper = pct + qnorm(.975) * se) %>%
  mutate(geography = factor(geography, levels = names(colors))) %>%
  ungroup()

errors.overall <- errors.by.office %>%
  group_by(rcv) %>%
  summarize(se = sd(pct) / sqrt(n()),
            pct = mean(pct),
            lower = pct + qnorm(.025) * se,
            upper = pct + qnorm(.975) * se,
            pct.label = sprintf("%.2f%%", pct * 100)) %>%
  ungroup()


errors.overall # top-line error rates to report in text of paper
errors.by.geo # error rates by geography


errors.by.geo %>%
  select(geography, date, rcv, pct) %>%
  spread(key = rcv, value = pct) %>%
  mutate(ratio = Yes / No)


labs <- data.frame(lab = c("Ranked\nchoice\nraces",
                           "Non-ranked\nchoice\nraces"),
                   rcv = c(T,F),
                   #color = c("blue","red"),
                   x = c(2.2,1.2),
                   y = .0125)

ggplot() + 
  
  # dots for each election
  geom_point(mapping = aes(x = id, y = pct, shape = rcv),
             data = errors.by.office,
             position = position_dodge(width = .5),
             alpha = .5) + 
  
  # colored bars for each state avg
  geom_errorbar(mapping = aes(ymin = pct, 
                              ymax = pct, 
                              x = id,
                              width = .6, # horiz width
                              color = rcv), 
                data = errors.by.geo,
                linewidth = .5, # vert thickness
                position = position_dodge(width = .5)) + 
  geom_errorbar(mapping = aes(ymin = lower,
                              ymax = upper,
                              x = id,
                              width = 0,
                              color = rcv),
                data = errors.by.geo,
                position = position_dodge(width = .5),
                size = .6) + 

  
  scale_y_continuous("Percent of voters who overvoted",
                     labels = scales::percent_format(.1),
                     breaks = seq(0,1,.005)) + 
  xlab("") + 
  theme_bw() + 
  scale_color_manual("Ranked\nchoice\nrace?", values = c("No"="red", "Yes"="blue")) + 
  scale_shape_discrete("Ranked\nchoice\nrace?")



